library(pacman)
pacman::p_load(tidyverse,data.table,broom,readxl,openxlsx,latex2exp,ggpubr,ggpmisc)
rm(list=ls())
################################################################################

input_folder = 'results_ic/lf'
output_folder = '../../output/figures/appendix'

###Spatial units
spatial_id_a='county_id' 
spatial_id_b='amc_id' 

#Data
df = fread('../../data/landuse/clean/geographicunits_argentina.csv.gz')
setnames(df, old = c(spatial_id_a),  new = c('spatial_id'))
df_a_units = df[, list(country,spatial_id)]
df = fread('../../data/landuse/clean/geographicunits_brazil.csv.gz')
setnames(df, old = c(spatial_id_b),  new = c('spatial_id'))
df_b_units = df[, list(country,spatial_id)]
df_sa_units = unique(rbind(df_a_units,df_b_units))

#Model
df = setDT(read.xlsx('aux_params/parameters.xlsx', sheet = 'supply'))[,list(spatial_id_name, land_ha)]
setnames(df, old=c('spatial_id_name'), new=c('spatial_id') )
df_u = merge(df, df_sa_units, by='spatial_id',all.x=T,sort=F)

###Weights
df = setDT(read.xlsx('aux_params/parameters.xlsx', sheet = 'weights'))
df$country = df_u$country
setnames(df, old=c('spatial_id_name'), new=c('spatial_id') )
df$land_ha = df_u$land_ha
df_w = df[, list(country,spatial_id, weight_beef,weight_crops,land_ha)]


########################################## MODEL

#Land
df = fread(paste0(input_folder,'/L.csv'))[,list(V1,V2)]
colnames(df) <- c('pasture_ha','crops_ha')
df$country = df_u$country
df$spatial_id = df_u$spatial_id
df$source='model'
df_land_m = df[,list(country,spatial_id,pasture_ha,crops_ha,source)]

#Output
df = fread(paste0(input_folder,'/Q.csv'))
colnames(df) <- c('beef_output_t','crops_output_t')
df$country = df_u$country
df$spatial_id = df_u$spatial_id
df$source='model'
df_output_m = df[,list(country,spatial_id,crops_output_t,beef_output_t,source)]


########################################## DATA

for (country in c('argentina','brazil')){
  
  df_u = fread(paste0('../../data/landuse/clean/geographicunits_',country,'.csv.gz'))
  df_l = fread(paste0('../../data/landuse/clean/landuse_',country,'_county.csv.gz'))

  if (country=='argentina'){
    
    #Spatial unit crosswalk
    df_u$spatial_id = df_u$county_id
    df_u = df_u[, list(country, county_id, spatial_id, region, land_area_km2)]
    year_d = 2018
    
    #Land
    df = merge(df_l, df_u, by='county_id')
    df = df[, c('land_area','crops','forage'):= list(land_area_km2*100, crops_seasonal2+crops_permanent+crops_unaccounted,forage_seasonal+forage_permanent)][,c('pasture'):=list(pasture+forage)]
    setnames(df, old = c('land_area','pasture','crops'),  new = c('land_ha','pasture_ha','crops_ha'))
    df_land_a = df[year==year_d, list(country, spatial_id, land_ha, pasture_ha, crops_ha)][, lapply(.SD, sum, na.rm=TRUE), by=list(country, spatial_id)]
    
    #Output (crops)
    df = fread(paste0('../../data/production/clean/production_',country,'.csv.gz'))
    df = merge(df, df_u, by='county_id')
    df = df[year==year_d & commodity %in% c('soybean','maize'), list(country, spatial_id, commodity, production_t)][, commodity:='crops'][, lapply(.SD, sum, na.rm=TRUE), by=list(country, spatial_id, commodity)]
    df_oc = dcast(df, country + spatial_id ~ commodity, value.var = "production_t")
    setnames(df_oc, old = c('crops'),  new =  c('crops_output_t'))
    
    #Output (beef)
    df = fread(paste0('../../data/production/clean/cattlestock_',country,'_county_census.csv.gz'))
    df = merge(df, df_u, by='county_id')
    output_stock_ratio = 3.02/53.6
    df=df[, c('beef_output_t') := list(output_stock_ratio*total_cattlestock )]
    df_ob = df[year==year_d, list(country, spatial_id, beef_output_t)][, lapply(.SD, sum, na.rm=TRUE), by=list(country, spatial_id)]
    
    #Merge all output
    df_output_a = merge(df_ob,df_oc, by=c('country', 'spatial_id'), all.x=T)
    
  }else{
    
    #Spatial unit crosswalk
    setnames(df_u, old=c(spatial_id_b), new=c('spatial_id') )
    df_u = df_u[, list(country, county_id, spatial_id, region, land_area_km2)]
    year_d = 2017
    
    #Land
    df = merge(df_l, df_u, by='county_id')
    df = df[,c('land_area','crops','pasture'):=list(land_area_km2*100, crops_seasonal2+crops_permanent, pasture_natural+pasture_planted)]
    setnames(df, old = c('land_area','pasture','crops'),  new = c('land_ha','pasture_ha','crops_ha'))
    df_land_b = df[year==year_d, list(country, spatial_id, land_ha, pasture_ha, crops_ha)][, lapply(.SD, sum, na.rm=TRUE), by=list(country, spatial_id)]
    
    #Output (crops)
    df = fread(paste0('../../data/production/clean/production_',country,'.csv.gz'))
    df = merge(df, df_u, by='county_id')
    df = df[year==year_d & commodity %in% c('soybean','maize'), list(country, spatial_id, commodity, production_t)][, commodity:='crops'][, lapply(.SD, sum, na.rm=TRUE), by=list(country, spatial_id, commodity)]
    df_oc = dcast(df, country + spatial_id ~ commodity, value.var = "production_t")
    setnames(df_oc, old = c('crops'),  new =  c('crops_output_t'))
    
    #Output (beef)
    df = fread(paste0('../../data/production/clean/cattlestock_',country,'_county_census.csv.gz'))
    df = merge(df, df_u, by='county_id')
    output_stock_ratio = 9.71/196.47
    df=df[, c('beef_output_t') := list(output_stock_ratio*total_cattlestock )]
    df_ob = df[year==year_d, list(country, spatial_id, beef_output_t)][, lapply(.SD, sum, na.rm=TRUE), by=list(country, spatial_id)]
    
    #Merge all output
    df_output_b = merge(df_ob,df_oc, by=c('country', 'spatial_id'), all.x=T)
    
  }
  
}
  
#Land
df = rbind(df_land_a,df_land_b)
df$source='data'
df_land_d = df[,list(country,spatial_id,pasture_ha,crops_ha,source)]

#Output
df = rbind(df_output_a,df_output_b)
df$source='data'
df_output_d = df[,list(country,spatial_id,crops_output_t,beef_output_t,source)]


########################################## COMPARISON

df = merge(df_land_d, df_land_m, by=c('country','spatial_id'))
df_land = merge(df, df_w, by=c('country','spatial_id'))

df = merge(df_output_d, df_output_m, by=c('country','spatial_id'))
df_output = merge(df, df_w, by=c('country','spatial_id'))

n_scatter=1
for(scatter_var in c('Log agreage (pasture)','Log agreage (crops)','Log output (beef)','Log output (crops)') ){
  
  if(scatter_var=='Log agreage (pasture)'){
    df=df_land
    df$data = log(df$pasture_ha.x)
    df$model = log(df$pasture_ha.y)
    df$size_var = df$land_ha }
  
  if(scatter_var=='Log agreage (crops)'){
    df=df_land
    df$data = log(df$crops_ha.x)
    df$model = log(df$crops_ha.y)
    df$size_var = df$land_ha }
  
  if(scatter_var=='Log output (beef)'){
    df=df_output
    df$data = log(df$beef_output_t.x)
    df$model = log(df$beef_output_t.y)
    df$size_var = df$weight_beef }
  
  if(scatter_var=='Log output (crops)'){
    df=df_output
    df$data = log(df$crops_output_t.x)
    df$model = log(df$crops_output_t.y)
    df$size_var = df$weight_crops }

  df = df[is.finite(data) & is.finite(model) & data>quantile(df$data, 0.05, na.rm=T),]
  x_min=min(df$data)
  x_max=max(df$data)
  y_min=min(df$model)
  y_max=max(df$model)
  spec_m = lm(model ~ data,  data=df, weights = size_var)
  intercept = round(summary(spec_m)$coefficients[1],2)
  slope = round(summary(spec_m)$coefficients[2],3)
  rsq = round(summary(spec_m)$r.squared,3)

  fig = ggplot(data = df, aes(x = data, y=model) ) +
    geom_point(alpha = 0.1, aes(size=size_var)) +
    stat_poly_line(formula = y ~ x, aes(weight = size_var), se=T, color="black", linewidth=2) +
    theme_minimal() +
    xlim(x_min,x_max)+
    ylim(y_min,y_max)+
    labs(x='data', y='model', title=paste0(scatter_var) )+
    annotate("text", x = 0.7*(x_max-x_min)+x_min, y = 0.22*(y_max-y_min)+y_min, label = paste0("R-sq = ", rsq), hjust=0, size=4)+
    annotate("text", x = 0.7*(x_max-x_min)+x_min, y = 0.16*(y_max-y_min)+y_min, label = paste0("Slope = ", slope), hjust=0, size=4)+
    annotate("text", x = 0.7*(x_max-x_min)+x_min, y = 0.07*(y_max-y_min)+y_min, label = "— line of best fit", hjust=0, size=4,color='black')+
    annotate("text", x = 0.7*(x_max-x_min)+x_min, y = 0.01*(y_max-y_min)+y_min, label = "- - 45° line", hjust=0, size=4,color='black')+
    geom_abline(aes(slope=1, intercept=0), linewidth=1, linetype='dashed', color='black') +
    theme(axis.title.x = element_text(hjust = 0.95, vjust=-1, size=13),
          axis.title.y = element_text(hjust = 0.95, vjust= 2, size=13),
          axis.text.x = element_text(size=9),
          axis.text.y = element_text(size=9),
          plot.title = element_text(size=15, hjust = 0.5),
          aspect.ratio = 1,
          panel.grid.minor = element_blank(),
          legend.title = element_blank(),
          legend.position = 'bottom',
          legend.text=element_text(size=9))+
    guides(size = "none")
  
  if(n_scatter==1){fig1=fig}
  if(n_scatter==2){fig2=fig}
  if(n_scatter==3){fig3=fig}
  if(n_scatter==4){fig4=fig}

  n_scatter=n_scatter+1
  
}

figure=ggarrange(fig1,fig2,fig3,fig4, nrow=2, ncol=2)
ggsave(paste0(output_folder,'/figure5_fit.pdf'), width = 10, height = 10)


